## R Script Output -------------------------------------------------------------
# Figure 2: Firm Size and Immigration Lobbying, 2017


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## IMPORTANT NOTE --------------------------------------------------------------
# This figure uses Orbis' proprietary data on firm sales. To protect Orbis' 
# proprietary data, this script loads anonymous firm-level data that excludes 
# firm identifiers and with rows randomly reshuffled. These two steps prevent 
# the linking of sales data back to firms. I create the anonymous data using 
# the code below.
# 
# # load main dataset
# load(file = paste(MAIN_DIR, "data-merge-91-17.RData", sep = "/"))
# 
# # subset
# data.17 <- data.merge %>%
#   select(year, 
#          lob_img_2017,
#          sales, 
#          est_img_expense_fy) %>%
#   filter(year == 2017)
# 
# # reshuffle rows
# rows <- sample(nrow(data.17))
# data.17.reshuffled <- data.17[rows, ]
# 
# # save
# save(data.17.reshuffled, 
#      file = paste(MAIN_DIR, "Figure-2-data-anonymous.RData", sep = "/"))


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse",
         "RColorBrewer", 
         "gridExtra", 
         "viridis")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/JOP-h1b-replication"


## set parameters and functions ------------------------------------------------
binomial_smooth <- function(...) {
  geom_smooth(method = "glm", method.args = list(family = "binomial"), ...)
}

axis.title.size <- 12


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "Figure-2-data-anonymous.RData", sep = "/"))

# create variable
data.17 <- data.17.reshuffled %>%
  mutate(log_sales = log(sales + 1))


## extensive margin ------------------------------------------------------------
# fit regression
m.sale.lobby <- glm(lob_img_2017 ~ log_sales, 
                    family = "binomial", 
                    data = data.17)
summary(m.sale.lobby)

# plot
p.sales.lobby <- ggplot(data.17, 
                        aes(x = log_sales, 
                            y = lob_img_2017)) +
  # Note: geom_jitter randomly/slightly changes the height of points each time 
  # the code is run. However, regression estimates are always the same in the plots
  # seed is now set for replication
  geom_jitter(position = position_jitter(height = 0.02,
                                         seed = 1234), 
              alpha = 0.4) + 
  binomial_smooth() +
  scale_y_continuous("Immigration Lobbying (1 = Yes, 0 = No)",
                     breaks = c(0, 1),
                     labels = c(0, 1)) +
  scale_x_continuous("Log Sales") +
  ggtitle("Extensive Margin") +
  theme_bw() + 
  theme(plot.title = element_text(size = axis.title.size + 2,
                                  face = "bold",
                                  margin = margin(0, 0, 10, 0),
                                  hjust = 0.5),
        axis.title.y = element_text(size = axis.title.size + 1,
                                    margin = margin(0, 10, 0, 0)),
        axis.title.x = element_text(size = axis.title.size + 1,
                                    margin = margin(10, 0, 0, 0)),
        axis.text = element_text(size = axis.title.size - 2),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none") + 
  annotate("text", 
           x = 1, 
           y = 0.32,
           label = paste("Coef = ", 
                         round(coef(m.sale.lobby)["log_sales"], 2),
                         sep = ""),
           color = "black",
           hjust = 0,
           size = 3.5) + 
  annotate("text", 
           x = 1, 
           y = 0.26,
           label = paste("p-value = ", 
                         sprintf("%.4f", round(summary(m.sale.lobby)$coefficients["log_sales", 4], 4)), 
                         sep = ""),
           color = "black",
           hjust = 0,
           size = 3.5) + 
  annotate("text", 
           x = 1, 
           y = 0.2,
           label = "glm(y ~ x, family = 'binomial')",
           color = "black",
           hjust = 0,
           size = 3.5)


## intensive margin ------------------------------------------------------------
# fit regression
m.sale.exp <- lm(log(est_img_expense_fy) ~ log_sales, 
                 data.17 %>% 
                   filter(lob_img_2017 == 1))

summary(m.sale.exp)

# plot
p.sales.exp <- ggplot(data.17 %>% 
                        filter(lob_img_2017 == 1), 
                      aes(x = log_sales, 
                          y = log(est_img_expense_fy))) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm") +
  scale_y_continuous("Log Immigration Lobbying Expenditure") +
  scale_x_continuous("Log Sales") +
  ggtitle("Intensive Margin") +
  theme_bw() + 
  theme(plot.title = element_text(size = axis.title.size + 2,
                                  face = "bold",
                                  margin = margin(0, 0, 10, 0),
                                  hjust = 0.5),
        axis.title.y = element_text(size = axis.title.size + 1,
                                    margin = margin(0, 10, 0, 0)),
        axis.title.x = element_text(size = axis.title.size + 1,
                                    margin = margin(10, 0, 0, 0)),
        axis.text = element_text(size = axis.title.size - 2),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none") + 
  annotate("text", 
           x = 18, 
           y = 9.6,
           label = paste("Coef = ", 
                         round(coef(m.sale.exp)["log_sales"], 2),
                         sep = ""),
           color = "black",
           hjust = 0,
           size = 3.5) + 
  annotate("text", 
           x = 18, 
           y = 9.2,
           label = paste("p-value = ", 
                         sprintf("%.4f", round(summary(m.sale.exp)$coefficients["log_sales", 4], 4)),
                         sep = ""),
           color = "black",
           hjust = 0,
           size = 3.5) +
  annotate("text", 
           x = 18, 
           y = 8.8,
           label = "lm(y ~ x)",
           color = "black",
           hjust = 0,
           size = 3.5)


## combine plots and save ------------------------------------------------------
pdf(paste(MAIN_DIR, "Figure-2.pdf", sep = "/"), 
    width = 8.2, height = 4.1)
grid.arrange(p.sales.lobby, p.sales.exp,
             ncol = 2, nrow = 1)
dev.off()
